{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "toc": true
   },
   "source": [
    "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
    "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Select-variables-for-analysis\" data-toc-modified-id=\"Select-variables-for-analysis-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Select variables for analysis</a></span></li><li><span><a href=\"#Limit-data-to-subpopulation-of-states-in-19-C-West\" data-toc-modified-id=\"Limit-data-to-subpopulation-of-states-in-19-C-West-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>Limit data to subpopulation of states in 19 C West</a></span><ul class=\"toc-item\"><li><span><a href=\"#Drop--variables-missing-all-values-&amp;-change-state-names\" data-toc-modified-id=\"Drop--variables-missing-all-values-&amp;-change-state-names-2.1\"><span class=\"toc-item-num\">2.1&nbsp;&nbsp;</span>Drop  variables missing all values &amp; change state names</a></span></li></ul></li><li><span><a href=\"#Export-data-to-Stata\" data-toc-modified-id=\"Export-data-to-Stata-3\"><span class=\"toc-item-num\">3&nbsp;&nbsp;</span>Export data to Stata</a></span></li></ul></div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook takes the big V-Dem dataset (too big for Stata IC) and reduces it to a subset of relevant data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:35:37.007596Z",
     "start_time": "2021-06-28T13:35:36.579231Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('mode.chained_assignment', None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:35:37.058900Z",
     "start_time": "2021-06-28T13:35:37.055526Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Volumes/GoogleDrive-107745440581041782819/My Drive/00_Researching/16_SocialScientization/-04_EJS/00_replication/\n",
      "/Volumes/GoogleDrive-107745440581041782819/My Drive/00_Researching/16_SocialScientization/-04_EJS/00_replication/00_data/01_ivs/\n"
     ]
    }
   ],
   "source": [
    "DIRECTORY = os.path.dirname(os.getcwd()) + '/'\n",
    "print(DIRECTORY)\n",
    "IVS = DIRECTORY + '00_data/01_ivs/'\n",
    "print(IVS)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Select variables for analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:22.485674Z",
     "start_time": "2021-06-28T13:48:22.483480Z"
    }
   },
   "outputs": [],
   "source": [
    "variables = \"\"\"year country_name  e_regiongeo  \n",
    "v2x_suffr  e_pelifeex   v2peprisch e_civil_war e_miinteco \n",
    "e_miinterc   v3stnatant v3stcitlaw v3stnatbank \n",
    "v3stflag v3stcensus v3ststatag v3ststybcov v3ststybpub  \n",
    " v2canuni   v2svindep \"\"\"\n",
    "variables = variables.split()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:25.612086Z",
     "start_time": "2021-06-28T13:48:23.551020Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "vdem = pd.read_csv(IVS + \"vdem10.csv\", usecols=variables)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:25.617198Z",
     "start_time": "2021-06-28T13:48:25.614085Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(27013, 19)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vdem.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Limit data to subpopulation of states in 19 C West"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:25.643044Z",
     "start_time": "2021-06-28T13:48:25.636062Z"
    }
   },
   "outputs": [],
   "source": [
    "west = vdem.loc[(vdem['e_regiongeo'] < 5) | (vdem['e_regiongeo'] == 16)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:25.832686Z",
     "start_time": "2021-06-28T13:48:25.827940Z"
    }
   },
   "outputs": [],
   "source": [
    "west = west.loc[(west['year'] < 1915) & (west['year'] > 1799)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:26.062404Z",
     "start_time": "2021-06-28T13:48:26.058601Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3796, 19)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "west.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Drop  variables missing all values & change state names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:26.716046Z",
     "start_time": "2021-06-28T13:48:26.710588Z"
    }
   },
   "outputs": [],
   "source": [
    "west.dropna(how='all', axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:26.918487Z",
     "start_time": "2021-06-28T13:48:26.913057Z"
    }
   },
   "outputs": [],
   "source": [
    "west = west.dropna(how='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:27.097890Z",
     "start_time": "2021-06-28T13:48:27.094774Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3796, 19)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "west.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:28.447960Z",
     "start_time": "2021-06-28T13:48:28.444719Z"
    }
   },
   "outputs": [],
   "source": [
    "west.replace(\"Würtemberg\", \"Wuertemberg\", inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:28.781428Z",
     "start_time": "2021-06-28T13:48:28.760392Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country_name</th>\n",
       "      <th>year</th>\n",
       "      <th>v2x_suffr</th>\n",
       "      <th>v2svindep</th>\n",
       "      <th>v2peprisch</th>\n",
       "      <th>v2canuni</th>\n",
       "      <th>v3stcensus</th>\n",
       "      <th>v3stcitlaw</th>\n",
       "      <th>v3stflag</th>\n",
       "      <th>v3stnatant</th>\n",
       "      <th>v3stnatbank</th>\n",
       "      <th>v3ststatag</th>\n",
       "      <th>v3ststybcov</th>\n",
       "      <th>v3ststybpub</th>\n",
       "      <th>e_regiongeo</th>\n",
       "      <th>e_pelifeex</th>\n",
       "      <th>e_civil_war</th>\n",
       "      <th>e_miinteco</th>\n",
       "      <th>e_miinterc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>362</th>\n",
       "      <td>Sweden</td>\n",
       "      <td>1800</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>32.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>363</th>\n",
       "      <td>Sweden</td>\n",
       "      <td>1801</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>36.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>364</th>\n",
       "      <td>Sweden</td>\n",
       "      <td>1802</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>365</th>\n",
       "      <td>Sweden</td>\n",
       "      <td>1803</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366</th>\n",
       "      <td>Sweden</td>\n",
       "      <td>1804</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>39.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    country_name  year  v2x_suffr  v2svindep  v2peprisch  v2canuni  \\\n",
       "362       Sweden  1800        0.0        1.0         NaN       5.0   \n",
       "363       Sweden  1801        0.0        1.0         NaN       5.0   \n",
       "364       Sweden  1802        0.0        1.0         NaN       5.0   \n",
       "365       Sweden  1803        0.0        1.0         NaN       5.0   \n",
       "366       Sweden  1804        0.0        1.0         NaN       5.0   \n",
       "\n",
       "     v3stcensus  v3stcitlaw  v3stflag  v3stnatant  v3stnatbank  v3ststatag  \\\n",
       "362         1.0         0.0       1.0         0.0          0.0         1.0   \n",
       "363         0.0         0.0       1.0         0.0          0.0         1.0   \n",
       "364         0.0         0.0       1.0         0.0          0.0         1.0   \n",
       "365         0.0         0.0       1.0         0.0          0.0         1.0   \n",
       "366         0.0         0.0       1.0         0.0          0.0         1.0   \n",
       "\n",
       "     v3ststybcov  v3ststybpub  e_regiongeo  e_pelifeex  e_civil_war  \\\n",
       "362          0.0          0.0            2        32.2          NaN   \n",
       "363          0.0          0.0            2        36.9          NaN   \n",
       "364          0.0          0.0            2        40.2          NaN   \n",
       "365          0.0          0.0            2        40.3          NaN   \n",
       "366          0.0          0.0            2        39.7          NaN   \n",
       "\n",
       "     e_miinteco  e_miinterc  \n",
       "362         0.0         0.0  \n",
       "363         0.0         0.0  \n",
       "364         0.0         0.0  \n",
       "365         0.0         0.0  \n",
       "366         0.0         0.0  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "west.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export data to Stata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-06-28T13:48:30.454217Z",
     "start_time": "2021-06-28T13:48:30.406032Z"
    }
   },
   "outputs": [],
   "source": [
    "west.to_stata(IVS + \"west_vdem10.dta\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": true,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
